# The code was tested on:
  # Version 3.5.2 (2018-12-20) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows >= 8 x64 (build 9200)
  # and
  # R version 3.5.0 (2018-04-23) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS High Sierra 10.13.6

# The run time for the entire code is ~ 6hrs

rm(list = ls(all=TRUE)) 

# setwd() to the source file location!

# The code replicates all results in the paper and the appendix, but *not* in the order they are reported, because the analyses jump between different levels (rayon-level, individual level, auxilliary datasets). The basic structure of the replication is as follows:
# 1. Cleanup variable names and prepare rayon-level dataset
# 2. Compute rayon-level weather instruments
# 3. Replicate all rayon-level results
# 4. Replicate results using auxilliary datasets: election fraud, newspapers, rada speeches
# 5. Merge weather data with survey data and replicate *all* survey level results

set.seed(123)
packages <- c("spdep", "lfe", "mgcv", "Formula", "splines", "BMA", "parallel", "stargazer", "dplyr", "plyr", "glmnet", "tikzDevice", "stringr", "stringi", "multiwayvcov", "DirectEffects", "RColorBrewer", "gridExtra", "lubridate", "ggplot2", "lmf", "zoo", "countrycode","xtable","maptools","raster","sp","rgeos","fields","ncdf4","gdata","gmodels","Matching")
new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies = TRUE)
lapply(packages, require, quietly = TRUE, character.only = TRUE)
source("functions.R")

labels <- c("Opposition to Red partisans (1941-1944)", "Anti-Soviet vote (1946-1958)", "Anti-Soviet protests (1987-1991)", "Anti-Russian vote (2002-2014)", "Anti-Yanukovych protests (2009-2013)")
labels_short <- strsplitm(labels, "\\(", 1)


#############################################################
# Clean up / simplify variables
#############################################################

load("main_data.Rdata")

data$ukrainian <- data$C1926_UKRAINIAN/data$C1926_TOTAL
data$russian <- data$C1926_RUSSIAN/data$C1926_TOTAL
data$rural <- data$C1926_TOTAL_RUR/data$C1926_TOTAL
data$famine <- log(100*data$FAMINE_TOTAL/(data$FAMINE_TOTAL + data$POP1933_TOTAL))
# Population weights (shorter name)
data$w <- data$C1926_TOTAL

# Rescale outcomes by type so that higher values represent more "anti-Soviet/Moscow" behavior
# note "PRO-RUSSIAN" here is actually "ANTI-RUSSIAN" because of 100-X)
d <- colnames(data@data)
data@data[, d[grep("P2_BASES_K050_\\d{4}",  d)]] <- -data@data[, d[grep("P2_BASES_K050_\\d{4}",  d)]]
data@data[, d[grep("P2_BASES_K025_\\d{4}",  d)]] <- -data@data[, d[grep("P2_BASES_K025_\\d{4}",  d)]]
data@data[, d[grep("P2_BASES_K075_\\d{4}",  d)]] <- -data@data[, d[grep("P2_BASES_K075_\\d{4}",  d)]]
data$P2_BASES_K050 <- apply(data@data[, d[grep("P2_BASES_K050_\\d{4}",  d)]], 1, mean, na.rm = TRUE)
data$P2_BASES_K025 <- apply(data@data[, d[grep("P2_BASES_K025_\\d{4}",  d)]], 1, mean, na.rm = TRUE)
data$P2_BASES_K075 <- apply(data@data[, d[grep("P2_BASES_K075_\\d{4}",  d)]], 1, mean, na.rm = TRUE)
data$AGAINSTALL <- apply(data@data[, d[grep("AGAINSTALL",  d)]], 1, mean, na.rm = TRUE)
data$PRT_TOP2 <- data$PRT_ANTIUSSR + data$PRT_INDEP
data$PRORUSSIAN_2002 <- data$PRORUSSIAN_2002/100
data@data[, d[grep("^PRORUSSIAN", d)]] <- 100 - 100*(data@data[, d[grep("^PRORUSSIAN", d)]])
data$PRORUSSIAN <- apply(data@data[, d[grep("^PRORUSSIAN", d)]], 1, mean, na.rm = TRUE)

# labels of outcome variables (averaged)
y <- c("P2_BASES_K050", "AGAINSTALL", "PRT_TOP2", "PRORUSSIAN", "MAIDAN_ANTIGOV")

#############################################################
# Prepare weather variables (create adversity index)
#############################################################

# Weather matrix (transpose to do normalization and map back):
W.build <- function(data, time.int = "1931|1932"){
  d <- data@data
  W <- subset(d, select = colnames(d)[which(grepl("W_TEMP|W_RAIN", colnames(d)) & !grepl("_S", colnames(d)))])
  W <- data.frame(event = substr(colnames(W), 3, 6), year = substr(colnames(W), 8, 11), month = substr(colnames(W), 12, 13), t(W))
  w <- aggregate(. ~ event + month, FUN = median, data = subset(W, year %in% 1926:1930, select = -c(year)))
  W <- merge(W, w, by = c("event", "month"), all.x =  TRUE)
  W <- W[order(W$event, W$year, W$month), ]
  W <- data.frame(var = paste("NORM", W$event, W$year, W$month, sep = "_"), W[,grep("\\.x", colnames(W))] - W[,grep("\\.y", colnames(W))], stringsAsFactors=FALSE)
  D <- t(W[, -1])
  colnames(D) <- W[,1]
  rownames(D) <- gsub("X|\\.x", "", rownames(D))
  D <- D[, grep(time.int, colnames(D))]
  return(D)
}

Weather <- W.build(data)

# First-stage specification
First <- Formula(famine ~ ukrainian + russian + rural + FOREST + I(1 - INDUSTRY_NONE) + AGRO_SHORT + log(C1926_TOTAL) + ns(DIST2RU, 3)| NAME_1)

out <- lm(formula(update(First, famine ~ . + NAME_1), rhs = 1), data = data, weights = data$w)
X  <- cbind(model.matrix(out), Weather)
Y <- data$famine

# Hand-selected weather variables
data$W_hand_selected <- Weather[, c("NORM_TEMP_1931_03", "NORM_TEMP_1931_08", "NORM_TEMP_1932_03", "NORM_TEMP_1932_07",  "NORM_RAIN_1931_07", "NORM_RAIN_1932_07")]

# BMA selected weather variables
BMA <- bicreg(X, y = Y, wt = data$w)
w.star <- sort(names(which(BMA$which[1, ])))
w.star <- w.star[grep("_TEMP|_RAIN", w.star)]
data$W_BMA_selected <- scale(Weather[, w.star])

# BMA averaged weather variables
w <- BMA$namesx[grep("_TEMP|_RAIN", BMA$namesx)]
data$W_BMA_averaged <- scale(Weather[,w]%*%BMA$postmean[w])
# Calculate number of "effective" resitrictions (number of variables used to calculate the weather adversity index, weighted by inclusion probability):
Q <- sum(BMA$probne0[grep("_TEMP|_RAIN", BMA$namesx)]/100)

# LASSO selection of weather variables
cv <- cv.glmnet(X, Y, weights = data$w, alpha = 1, nfolds = 10)
# Adjust CV-penalty to select 20 or fewer instruments (otherwise, too many instruments selected)
lasso.out <- glmnet(X, Y, lambda = 10*cv$lambda.min, alpha = 1, weights = data$w)
b <- as.matrix(lasso.out$beta)
b <- b[rownames(b)%in%colnames(Weather),]
b <- b[b!=0]
data$W_LASSO <- as.matrix(Weather[,names(b)])

main_data <- data # to use when "data" object gets overidden

############################################
# Figure 1: FAMINE DEATHS AND WEATHER ADVERSITY
############################################

data$famine_nat <- exp(data$famine)
famine <- spplot(data, "famine_nat", main = "Famine deaths (percent)", col.regions = brewer.pal(n = 9, name = "OrRd"), cuts = 8, col = "grey", par.settings = list(axis.line = list(col =  'transparent')), colorkey = list(space = "bottom"))
weather <- spplot(data, "W_BMA_averaged", main = "Weather adversity (standard deviations)", col.regions = brewer.pal(n = 9, name = "Blues"), cuts = 8, col = "grey", par.settings = list(axis.line = list(col =  'transparent')), colorkey = list(space = "bottom"))  
grid.arrange(famine, weather, nrow = 1)

#############################################################
# Table A4.3: First stage estimates for different instruments
#############################################################

# Labels of outcome variables
y <- c("P2_BASES_K050", "AGAINSTALL", "PRT_TOP2", "PRORUSSIAN", "MAIDAN_ANTIGOV")

# Models for each set of instruments
Model <- NULL
Model[[1]] <- formula(update(First, scale(y) ~ . | . |  (famine ~ W_hand_selected)| NAME_1))
Model[[2]] <- formula(update(First, scale(y) ~ . | . |  (famine ~ W_BMA_selected)| NAME_1))
Model[[3]] <- formula(update(First, scale(y) ~ . | . |  (famine ~ W_BMA_averaged)| NAME_1))
Model[[4]] <- formula(update(First, scale(y) ~ . | . |  (famine ~ W_LASSO)| NAME_1))
Model <- lapply(Model, Formula)

# The coefficient reported in the text of the paper: "one standard deviation in weather adversity is associated with 2.9 percentage point greater famine deaths"
summary(felm(update(First, exp(famine) ~ . + W_BMA_averaged | NAME_1 | 0 | NAME_1), data = data, weights = data$w), robust = TRUE)

# First-stage results for each instrument
data$y <- data$P2_BASES_K050
f.out  <- lapply(Model, function(x) felm(x, data = data, weights = data$w, keepX = TRUE))
# The warning pertains only to LASSO model and it appears only when we cluster the standard errors. It is not clear why this warning shows up because the design matrix *has* full rank as can be seen be executing this line:
rankMatrix(f.out[[4]]$stage1$X)[1]==ncol(f.out[[4]]$stage1$X)
# From the documentation, seems like the call "felm" from "lfe" package internally conducts a joint F-test that all second-stage coefficients are equal to zero, and generates the  warning at that step. Note that "felm" documentation notes that the said F test is not reliable. We are not using the results of the said F test in our analyses at any point.

x <- lapply(f.out, function(x) x$stage1)

r <- function(x) {
  z <- gsub("W_hand_selected|W_BMA_selected|W_LASSO|NORM_", "", names(x$rse))
  z <- gsub("TEMP_", "Temperature: ", z)
  z <- gsub("RAIN_", "Precipitation: ", z)
  z <- gsub("W_BMA_averaged", "Weather index", z)
  z <- gsub("_", "-", z)
  rownames(x$coefficients) <- names(x$cse) <-  names(x$cpval) <- z
  x
}

x   <- x[c(3, 1, 2, 4)]
f.out <- lapply(x, r)

stargazer(f.out, coef = lapply(f.out, function(x) x$coefficients), se = lapply(f.out, function(x) x$cse), p = lapply(f.out, function(x) x$cpval), keep = c("Temperature|Precipitation|Weather index"), single.row = TRUE, align = TRUE, digits = 2, no.space = TRUE, column.sep.width = "5pt", dep.var.caption = "Instrument selection method:", column.labels = c("Index", "Manual", "BMA", "LASSO"), dep.var.labels = "", keep.stat = c("n","adj.rsq"), add.lines = list(c("Robust $F$-statistic", unlist(lapply(x, function(x) roundr(summary(x, robust = TRUE)$rob.iv1fstat[[5]], 2))))), font.size = "small", notes.align = "l", title = "First stage estimates for different instruments, adjusted for oblast fixed effects and the covariates. Standard errors in parentheses clusterred by oblast.", label = "tab:first_stage")

#############################################################
# Table A1.1: Summary statistics for the main variables
#############################################################
X.descriptive <- cbind(exp(data$famine), data$famine, data@data[,y], model.matrix(out)[,2:10], data$DIST2RU)
stargazer(X.descriptive, notes.align = "l", title = "Summary statistics for the main variables.", covariate.labels = c("Famine mortality (percent)", "Famine mortality (percent logged)", "Opposition to Red partisans", "Anti-Soviet votes (percent)", "Anti-Soviet protests (count)", "Anti-Russian votes (percentage)", "Anti-Russian protests (count)", "Ukrainians in 1926 (proportion)", "Russians in 1926 (proportion)", "Rural population in 1926 (proportion)", "Forestation", "Manufacturing", "Dominant crop: dairy", "Dominant crop: potato", "Dominant crop: wheat", "Population in 1926 (log)", "Distance to Russian border (km)"), font.size = "normalsize", digits = 2, label = "tab:summarystat")

#############################################################
# Table 3: FAMINE MORTALITY AND POST-FAMINE AVERAGED OUTCOMES
#############################################################

# labels of outcome variables (averaged)
y <- c("P2_BASES_K050", "AGAINSTALL", "PRT_TOP2", "PRORUSSIAN", "MAIDAN_ANTIGOV")
# IV 
out.iv <- Iv.reg(y = y, Model = Model, data = data)
# OLS
out.ols <- Iv.reg(y = y, Model = update(First, scale(y) ~ . + famine | NAME_1 | 0 | NAME_1), data = data)
tab1 <- temp(out.iv[[3]])
tab2 <- temp(out.ols[[1]])
cbind(tab2[,1:2], tab1[,2:3])

###############################################################
#  Table A6.4: Second-stage IV estimates with pre-famine covariates.
###############################################################
out <- lapply(out.iv[[3]], function(x) x$ivm)
stargazer(out, coef = lapply(out, function(x) x$coefficients), se = lapply(out, function(x) x$cse), p = lapply(out, function(x) x$cpval), omit = c("Moran|DIST2RU"), single.row = FALSE, align = TRUE, digits = 2, no.space = TRUE, column.sep.width = "-5pt", dep.var.labels = "", keep.stat = c("n","adj.rsq"), font.size = "small", notes.align = "l", title = "Second-stage IV estimates with pre-famine covariates. Weather adversity index is used as instrument. Coefficients for Oblast fixed effects, cubic spline for distance to Russia, and spatial Moran eigenvectors are not shown. Standard errors adjust for clustering at oblast level.", type = "latex", covariate.labels = c("Ukrainians (1926)", "Russians (1926)", "Rural population (1926)", "Forestation", "Industrialization", "Dominant crop: dairy", "Dominant crop: potato", "Dominant crop: wheat", "Population (1926)", "Famine"), notes.append=FALSE, dep.var.caption = "", column.labels =  strsplitm(labels, "\\(", 1), add.lines = list(c("Moran's I (p-value)", roundr(unlist(lapply(out.iv[[3]], function(x) x$p)), 2))))

###############################################################
# Table A6.5: IV RESULTS WHEN DEPENDENT VARIABLES ARE NOT NORMALIZED
###############################################################
out.iv_unscaled <- Iv.reg(y = y, Model[[3]], data = data, scale = FALSE)[[1]]
temp(out.iv_unscaled)

###############################################################
#  Table A7.6: Additional IV results
###############################################################

# Create contemporanenous weather shocks
# Calculate Weather index for contemporaneous weather:
w_data <- get(load("weather_adm2_1933_na3.RData"))
w_data <- subset(w_data, !ID%in%as.character(c(244, 245, 246, 248, 249, 250)))
# Create interval from which weather variables are taken
make_int <- function(x) paste(x-(2:1), collapse = "|")
# Create weather for a given year (if multiple years per outcome, then average)
make_W <- function(year){
  f      <- function(year){
    w_new  <- W.build(w_data, time.int = make_int(year))
    colnames(w_new) <- gsub(as.character(year-2), "1931", colnames(w_new))
    colnames(w_new) <- gsub(as.character(year-1), "1932", colnames(w_new))
    w <- BMA$namesx[grep("_TEMP|_RAIN", BMA$namesx)]
    scale(w_new[, w]%*%BMA$postmean[w])
  }
  apply(as.matrix(sapply(year, f)), 1, mean)
}

# Estimate all TSLS regressions in one list:
out.robust <- list(
  # Without population weights
  no.weights = Iv.reg(y = y, Model = Model[[3]], data = data, weights = NULL)[[1]],
  # Without synthetic spatial covariates
  no.moran   = Iv.reg(y = y, Model = Model[[3]], data = data, Moran = FALSE)[[1]],
  # Adjusting for contemporaneous weather
  weather = list(
    iv.reg(y = y[1], Model = update(Model[[3]], . ~ . + make_W(1941:1944)), data = data),
    iv.reg(y = y[2], Model = update(Model[[3]], . ~ . + make_W(c(1946, 1950, 1954, 1958))), data = data),
    iv.reg(y = y[3], Model = update(Model[[3]], . ~ . + make_W(1988:1991)), data = data),
    iv.reg(y = y[4], Model = update(Model[[3]], . ~ . + make_W(c(2002, 2004, 2006, 2007, 2010, 2012, 2014))), data = data),
    iv.reg(y = y[5], Model = update(Model[[3]], . ~ . + make_W(2009:2013)), data = data))
)

do.call("rbind", lapply(out.robust, temp))

###############################################################
# Table A7.7: IV results using alternative instruments.
###############################################################
do.call("rbind", lapply(out.iv[-3], temp))

###############################################################
#  Table A7.8: IV RESULTS FOR WWII OUTCOMES USING ALTERNATIVE MEASURES
###############################################################
out.ww2_alternative  <- Iv.reg(y = c("P2_BASES_K025", "P2_BASES_K075", "P_OPS_DIST_MIN", "P_OPS_K050"), Model = Model[[3]], data = data)[[1]]
temp(out.ww2_alternative)

#############################################################
# Table A7.9: IV RESULTS USING IMPUTED INSTRUMENT.
#############################################################
boot_BMA <- function(m){
  # Sample model
  Mod <- sample(1:nrow(BMA$which), 1, prob = BMA$postprob)
  X.sub <- data.frame(X)[, setdiff(colnames(data.frame(X)), BMA$dropped)[which(BMA$which[Mod, ])]]
  # Sample model-specific coeficients
  out <- lm(data$famine ~ as.matrix(X.sub), weights = data$w)
  b <- rmnorm(1, coefficients(out), vcov(out))
  names(b) <- gsub("as.matrix(X.sub)", "", names(b), fixed = TRUE)
  w <- grep("_TEMP|_RAIN", names(b), value = TRUE)
  # Calculate weather adversity
  data$W_BMA_averaged <- scale(Weather[,w]%*%b[w])
  # Estimate IV coefficients (fixing ME's from the baseline)
  out <- out.iv[[3]][[m]]
  data$Moran <- apply(out$Fit.moran$dataset, 2, scale)
  data$y <- data@data[, y[m]]
  ivm <- felm(update(Model[[3]], . ~ . + Moran), data = data, weights = data$w)
  data.frame(
    coef = unname(coef(ivm)["`famine(fit)`"]),
    coef.var = vcov(ivm)["`famine(fit)`", "`famine(fit)`"],
    f.stat = ivm$stage1$rob.iv1fstat$famine["F"]/length(w)
  )
}

# wrapper function
wrap_boot_BMA <- function(m){
  B <- replicate(1000, boot_BMA(m), simplify = FALSE)
  b <- unlist(lapply(B, function(x) x$coef))
  b.var <- unlist(lapply(B, function(x) x$coef.var))
  b.hat <- mean(b)
  se <- sqrt(mean(b.var) + sd(b) + sd(b)/length(b))
  p.val <- 2*pnorm(abs(b.hat/se), lower.tail=FALSE)
  f.stat <- mean(unlist(lapply(B, function(x) x$f.stat)))
  data.frame(b.hat, se, p.val, f.stat)
}

cbind(y, roundr(do.call("rbind", lapply(1:5, wrap_boot_BMA)), 2))

##########################################################
# Figure A7.3: Point estimates of the famine effect
##########################################################
remove_raion <- function(var){
  out <- out.iv[[3]][[var]]
  ndata <- subset(data, !rownames(data@data)%in%names(out$ivm$na.action))
  ndata$y <- ndata@data[,y[var]]
  ndata$Moran <- 1
  if(!is.null(out$Fit.moran)) ndata$Moran <- fitted(out$Fit.moran)
  g <- function(x){
    est    <- felm(update(out$Model, . ~ . + Moran), data = ndata[-x,], weights = ndata$w[-x])
    c(coefficients(est)["`famine(fit)`"], confint(est, "`famine(fit)`"))
  }
  boots <- t(sapply(1:nrow(ndata), g))
  data.frame(id = 1:nrow(boots), y[var], boots)
}

out_remove <- do.call("rbind", lapply(1:5, remove_raion))
out_remove$var <- factor(out_remove$y.var., labels = gsub(" ", " \n ", str_trim(strsplitm(labels, "\\(", 1))))
ggplot(out_remove, aes(x = id, y=X.famine.fit..)) + geom_errorbar(aes(ymin=V2, ymax=V3), color = "light blue") + geom_point() + facet_grid(. ~ var) + theme_bw() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + ylab("")

###############################################################
# Table A8.10: Reduced form relationships between weather adversity index....
###############################################################
# Load and prepare data for western Ukraine
wd <- get(load("data_adm2_west_na3.RData"))

# Create weather matrix (normalized shocks)
Weather_placebo <- W.build(wd)

# Predict famine in west based on model trained in east
wd$W_BMA_averaged <- scale(Weather_placebo[, w]%*%BMA$postmean[w])
wd$W_hand_selected <- Weather_placebo[, c("NORM_TEMP_1931_03", "NORM_TEMP_1931_08", "NORM_TEMP_1932_03", "NORM_TEMP_1932_07",  "NORM_RAIN_1931_07", "NORM_RAIN_1932_07")]
wd$W_BMA_selected <- Weather_placebo[, which(colnames(Weather_placebo)%in%colnames(data$W_BMA_selected))]
wd$W_LASSO <- Weather_placebo[, which(colnames(Weather_placebo)%in%colnames(data$W_LASSO))]

# Collect outcome variables and change names so that all is passed in the same Model:
d <- colnames(wd@data)
wd$P2_BASES_K050 <- -apply(wd@data[, d[grep("P2_BASES_K050_\\d{4}$",  d)]], 1, mean, na.rm = TRUE)
wd$AGAINSTALL <- apply(wd@data[, d[grep("AGAINSTALL",  d)]], 1, mean, na.rm = TRUE)
wd$PRT_TOP2 <- wd$PRT_ANTIUSSR + wd$PRT_INDEP
wd$PRORUSSIAN <- 100 - apply(wd@data[, d[grep("^PRORUSSIAN", d)]], 1, mean, na.rm = TRUE) 
wd$ukrainian <- wd$P1931_UKR
wd$russian <- wd$P1931_RUS
wd$C1926_TOTAL <- wd$P1931_POP
wd$rural <- 100 - wd$URBAN1945_PCT_MINMIN
wd$w <- wd$C1926_TOTAL
wd$w[is.na(wd$w)] <- mean(wd$w, na.rm = TRUE)
wd$NAME_1 <- factor(wd$NAME_1)
wd@data <- droplevels(wd@data)

p.model <- update(First, scale(y) ~ . + W_BMA_averaged)
placebo <- list(
  west = Iv.reg(y, Model = p.model, data = wd)[[1]],
  east =  Iv.reg(y, Model = p.model, data = data)[[1]]
)


placebo.table <- function(x){
  f <- function(x){
    z <- summary(x$ivm, robust = TRUE)$coefficients["W_BMA_averaged", ]
    p <- c( "{**}", "{*}", "{\\dagger}", "{}")[cut(z[4], c(-1, 0.01, 0.05, .1, 1), labels=1:4)]
    paste(roundr(z[1], 2), " ~ (", roundr(z[2], 2), ")^", p, sep = "")
  }
  do.call("rbind", lapply(x, f))
}

cbind(labels, do.call("cbind", lapply(placebo, placebo.table)))

####################################################
# Table A8.11: Additional placebo tests
####################################################

placebo_out <- function(p.model){
  g <- function(y, data){
    data$y <- data@data[,y]  
    fit <- felm(p.model, data = data, weights = data$w)
    w <- grep("TEMP|RAIN|W_", names(coefficients(fit)), value = TRUE)
    data.frame(y = y, var = sub("_", "-", sub("_", ": ", strsplitm(w, "NORM_", 2))), coef = coefficients(fit)[w], p = summary(fit, robust = TRUE)$coefficients[w,4])
  }
  out <- data.frame(
    do.call("rbind", lapply(y, g, data = data)),
    do.call("rbind", lapply(y, g, data = wd))
  )
  out <- data.frame(y = y, placebo_fail = (out$p < 0.1)&(out$p.1 < 0.1)&sign(out$coef) == sign(out$coef.1))
  out$y <- factor(out$y, levels = y)
  tapply(out$placebo_fail, out$y, function(x) paste(as.character(sum(x)), as.character(length(x)), sep = "/"))
}

out <- data.frame(rbind(
  placebo_out(p.model = update(First, scale(y) ~ . + W_BMA_averaged)),
  placebo_out(p.model = update(First, scale(y) ~ . + W_hand_selected)),
  placebo_out(p.model = update(First, scale(y) ~ . + W_BMA_selected)),
  placebo_out(p.model = update(First, scale(y) ~ . + W_LASSO))
))

colnames(out) <- labels_short
rownames(out) <- c("Weather index", "Manual selection", "BMA selection", "LASSO selection")
out




############################################################################
# Figure A10.7 The effects of famine on behavioral opposition to Moscow
# Figure 2: FAMINE EFFECTS ON RED PARTISANS BY YEAR (subfigure of the above)
# Figure A13.12: The effects of famine on pro-Russian votes (subfigure of the above)
############################################################################

# The function below is for regressions with yearly data (WW2 and elections)
# Function to estimate IV effects by year for each column of "d" conditional on ME's estimated for "var" (row average of "d")
roll.yearly <- function(var, d, x.label = NULL, main = "", out = NULL, add = FALSE){
  
  if(is.null(out)){
    
    if(formals(iv.reg)$Moran){  
      # Extract ME's from baseline
      iv.moran <- iv.reg(var, Model = Model[[3]], data = data)
      data$S   <- fitted(iv.moran$Fit.moran)
    }
    
    out <- NULL
    for (i in 1:ncol(d)){
      data$y <- d[,i]
      model  <- Model[[3]]
      # Add Moran EV's from baseline
      if(!is.null(data$S)) model <- update(model, . ~ . + S)
      out[[i]] <-  iv.reg(y = NULL, Model = model, data = data, Moran = FALSE)$ivm
    }
  }
  
  f <- function(x){
    w <- grep("famine", names(coefficients((x))))
    c(coefficients(x)[w], confint(x)[w,])
  }
  
  Y <- do.call("rbind", lapply(out, f))
  
  p <- function(){
    par(mar = c(4, 3, 2, 1))
    plot(NULL, xaxt = "n", xlim = c(1, length(out)), ylim = range(c(Y)), xlab = "", ylab = "IV coefficient", las = 2, lwd = 4, main = main)
    if(add){
      # Calculate areas for Stalingrad battle:
      x0 <- (ymd("1942-8-23") - ymd("1940-1-1"))/365
      x1 <- (ymd("1943-2-2") -  ymd("1940-1-1"))/365
      polygon(c(x0, x0, x1, x1), c(-3, 3, 3, -3),  col =  adjustcolor("grey", alpha.f = 0.25), border = NA)
      text((x0+x1)/2, min(Y), "Staliningrad", pos = 3, font = 2)
      # Calculate areas for Kursk battle:
      x0 <- (ymd("1943-7-5") - ymd("1940-1-1"))/365
      x1 <- (ymd("1943-8-23") -  ymd("1940-1-1"))/365
      text((x0+x1)/2, min(Y), "Kursk", pos = 3, font = 2)
      polygon(c(x0, x0, x1, x1), c(-3, 3, 3, -3),  col =  adjustcolor("grey", alpha.f = 0.25), border = NA)
    }
    points(1:length(out), Y[,1], type = "o", xaxt = "n", pch = 16, ylim = range(c(Y)), xlab = "", ylab = "IV coefficient", las = 2, lwd = 1.5, main = main, cex = 1.5)
    abline(v = c(18, 34, 35), col =  adjustcolor("grey", alpha.f = 0.25), lwd = 20)
    if(is.null(x.label)) x.label <- 1:length(out)
    axis(1, at = 1:length(out), x.label)
    #abline(h = mean(Y[,1]), col = "dark blue", lty = 3, lwd = 3)
    abline(h = 0, lty = 3, lwd = 3)
    segments(1:length(out), Y[,2], 1:length(out), Y[,3],  col = "black", xlab = "", lwd = 3)
  }
  
  p()
  
}

# Red partisans by year
d <- data@data[ , paste(y[1], 1941:1944, sep = "_")]
roll.P2_BASES <- roll.yearly(var = y[1], d = d, x.label = 1941:1944, main = "Opposition to Red partisans", add = TRUE)

# Soviet elections by year
d <- data@data[ ,paste(y[2], c("1946", "1950", "1954", "1958"), sep = "_")]
roll.AGAINST_ALL <- roll.yearly(var = y[2], d = d, x.label = c("1946", "1950", "1954", "1958"), main = "Anti-Soviet Votes")

# Ukrainian elections by year
d <- data@data[ ,paste(y[4], c("2002", "2004", "2006", "2007", "2010", "2012", "2014", "2015"), sep = "_")]
roll.ANTI_RUSSIAN <- roll.yearly(var = y[4], d = d, x.label = c("2002", "2004", "2006", "2007", "2010", "2012", "2014 (pres.)", "2014 (parl.)"), main = "Anti-Russian Votes")

# Function to estimate rolling IV regressions for daily data (protests)
roll.daily <- function(var, d, out = NULL, K = 30, W = 90, add = NULL, ...){
  
  if(is.null(out) & formals(iv.reg)$Moran){
    iv.moran <- iv.reg(y = var, Model = Model[[3]], data = data)
    data$S   <- fitted(iv.moran$Fit.moran)
  }
  
  if(!is.null(out)){
    data$S <- out  
  }
  
  start <- min(ymd(colnames(d)))
  int   <- c(start, start + W)
  
  iv.time <- function(i){
    time <- int + i*K
    t <- which(ymd(colnames(d)) < time[2] & ymd(colnames(d)) >= time[1])
    data$y <- apply(as.matrix(d[, t]), 1, sum)
    model <- update(Model[[3]], z(y) ~ . )
    if(!is.null(data$S)) model <- update(Model[[3]], z(y) ~ . + S)
    out <- iv.reg(y = NULL, Model = model, data = data, Moran = FALSE)$ivm
    empty <- sum(data$y > 0) <= 1
    list(time = time, ivreg = out, empty = empty)
  }
  
  # How many intervals in the data?
  N <- round((ymd(last(colnames(d))) - ymd(first(colnames(d))))[[1]]/K)
  
  out <- lapply(1:N, iv.time)
  
  f <- function(x){
    w <- grep("famine", names(coefficients((x$ivreg))))
    c(coefficients(x$ivreg)[w], confint(x$ivreg)[w,])*(1 - x$empty)
  }
  
  Y <- do.call("rbind", lapply(out, f))
  
  p <- function(...){
    par(mar = c(3.2, 2.2, 2, 0.1))
    ee <- unlist(lapply(out, function(x) x$empty))
    plot(NULL, xaxt = "n", ylim = range(c(Y)), xlab = "", ylab = "IV estimate", las = 2, xlim = c(1,length(out)), ...)
    if(!is.null(add)){
      x <- sapply(ymd(add$dates), function(z) which.min(abs(difftime(do.call("c", lapply(out, function(x) x$time[1])),z,units='days'))))
      abline(v = x, col =  adjustcolor(add$colors, alpha.f = 0.25), lwd = 20)
    }
    points(1:length(out), Y[,1], type = "o", pch =  ifelse(ee, 1, 16), cex = 1.5)
    #abline(h = mean(Y[,1]), col = "dark blue", lty = 3, lwd = 3)
    abline(h = 0, lty = 3, lwd = 3)
    segments(1:length(out), Y[,2], 1:length(out), Y[,3],  col = adjustcolor("black", alpha.f = 3/4), xlab = "", lwd = 3)
    x <- lapply(out, function(x) x$time)
    x <- do.call("c", lapply(x, function(x) x[1]))
    x <- as.yearmon(x)
    x <- gsub(" ", "-", x)
    w <- seq(1, length(x), round(N/10))
    axis(1, at = w, x[w], las = 1)
  }
  
  p(...)
  
}

# ANTI-SOVIET PROTESTS UNTIL THE END OF 1991
d <- get(load("data_adm2_1933_na3_ProtestDay.RData"))
d <- subset(d, !ID%in%as.character(c(244, 245, 246, 248, 249, 250)))
d <- d@data[, grep(paste(y[3], "\\d{8}$", sep = "_"), colnames(d@data))]
colnames(d) <- ymd(strsplitm(colnames(d), "_", 3))
d <- d[, which(apply(d, 2, mean) > 0)]
d <- d[, ymd(colnames(d)) <= ymd("1991-12-31")]
roll.PRT <- roll.daily(var = y[3], d = d, main = "Anti-Soviet protests", K = 30, W = 90)

# ANTI-YANUKOVYCH PROTESTS
d <- get(load("data_adm2_1933_na3_MaidanDay.RData"))
d <- subset(d, !ID%in%as.character(c(244, 245, 246, 248, 249, 250)))
d <- d@data[, grep(paste(y[5], "\\d{8}$", sep = "_"), colnames(d@data))]
colnames(d) <- ymd(strsplitm(colnames(d), "_", 3))
d <- d[, which(apply(d, 2, mean) > 0)]
roll.MAIDAN <- roll.daily(d = d, var = y[5], main = "Anti-Yanukovych Protests")

############################################################################
# Figure A11.8 (Appendix): Soldiers' birth locations
############################################################################
# Load data
map0 <- main_data
load("PNdata_UKR.RData")

# Down-sample & convert
pn.samp <- pn.data[sample(1:nrow(pn.data),500000,replace=FALSE),]
pn.samp <- SpatialPointsDataFrame(coords=data.frame(LONG=pn.samp$LONG,LAT=pn.samp$LAT),data = pn.samp,proj4string = CRS(proj4string(map0)))
pn.samp <- pn.samp[map0,]

# Plot
par(mar=c(0,0,0,0))
plot(map0,border="white",col="lightblue")
points(pn.samp$LONG,pn.samp$LAT,col=rgb(0,0,0,.25),pch=1,cex=.4)
plot(map0,border="white",col=NA,add=T,lwd=.8)
legend("topright",pch=1,legend="Soldier's birth location",bty="n",cex=1.5)

############################################################################
## Table A11.15: IV RESULTS FOR WWII BATTLEFIELD BEHAVIORS
############################################################################

# Create variables
w_dis <- paste(c("WWII_DISCHARGE_DESERT", "WWII_DISCHARGE_DEFECT", "WWII_DISCHARGE_TREASON", "WWII_DISCHARGE_MIA", "WWII_DISCHARGE_POW"), "PROP0", sep = "_")
data$WWII_DISLOYALTY <- apply(data@data[, w_dis], 1, sum)
w_sac <- paste(c("WWII_DISCHARGE_KIA", "WWII_DISCHARGE_WIA"), "PROP0", sep = "_")
data$WWII_SACRIFICE <- apply(data@data[, w_sac], 1, sum)
out.ww2_battlefield  <- Iv.reg(y = c("WWII_DISLOYALTY", "WWII_SACRIFICE"), Model = Model[[3]], data = data)[[1]]
temp(out.ww2_battlefield)

############################################################################
## Table A12.16:  Famine mortality and post-war Lenin statues 
############################################################################

data$LENIN_PROP <- data$LENIN_SUM/data$C1926_TOTAL
out.lenin <- Iv.reg(c("LENIN_SUM", "LENIN_PROP"), Model = Model[[3]], data = data)
temp(out.lenin[[1]])

############################################################################
## Table A12.17: Famine mortality and quality of candidates in the post-war elections
############################################################################

# Fn to create candidate quality variables (exclude 1937)/maybe we should remove 1937 column to start with
make_var <- function(var) apply(data@data[, grep("1937", grep(var, colnames(data@data), value = TRUE), invert = TRUE, value = TRUE)], 1, mean)

data$PARTY_MEMBER <- make_var("PARTY_MEMBER")
data$DECORATED    <- make_var("DECORATED")
data$UKRAINIAN    <- make_var("ETHNICITY_UKRAINIAN")
data$CANDIDATE_QUALITY <- make_var("CANDIDATE_QUALITY")

out.soviet.els <- Iv.reg(c("PARTY_MEMBER", "DECORATED", "UKRAINIAN", "CANDIDATE_QUALITY"), Model = Model[[3]], data = data)[[1]]
temp(out.soviet.els)

############################################################################
# Figure A12.9: Distribution of last digits in 1946-1958 elections
############################################################################

load("USSR_elections.Rdata")

last.digit <- function(x, title = ""){
  y <- as.character((x))
  y <- as.numeric(substr(y, nchar(y), nchar(y)))
  barplot(table(y), main = paste(title, "\n (p-value = ", roundr(chisq.test(table(y))$p.value, 2), ")"), xlab = "Last digit")
}

par(mfrow = c(1, 3), mar = c(2, 2, 2, 2))
last.digit(data$BALLOTS_CAST, title = "Ballots cast")
last.digit(data$BALLOTS_AGAINSTALL, title = "Ballots against")
last.digit(data$BALLOTS_CANDIDATE, title = "Ballots in favor")

#################################################################
# FFigure A13.10: Salience of famine in the parliamentary debates 
#################################################################

rada <- get(load("rada_holodomor.Rdata"))
ggplot(rada, aes(date, y))+geom_line()+xlab(label='dates')+ylab(label='value') + scale_x_date(date_breaks = "2 years", date_labels = "%Y") + xlab("") + ylab("Times Holodomor discussed") + theme_minimal() + labs(title = "Parliamentary discussions") + theme(plot.title = element_text(hjust = 0.5, color="black", size=14, face="bold"))

#################################################################
# Figure A13.11: Mentioning of the famine in Ukrainian newspapers
#################################################################

newspapers <- get(load("ukr_newspapers.Rdata"))
newspapers$date <- as.Date(as.yearmon(paste(newspapers[,2], newspapers[,1], sep = "-")))
newspapers$Newspaper <- factor(newspapers$site, labels = c("Den (Day)", "Fakty i Kommentarii", "Pravda", "Zerkalo Nedely"))
ggplot(newspapers, aes(date, y, colour = Newspaper)) + geom_line()+xlab(label='dates')+ylab(label='value') + scale_x_date(date_breaks = "2 years", date_labels = "%Y") + xlab("") + ylab("Times Holodomor mentioned") + theme_minimal() + labs(title = "Newspapers") + theme(plot.title = element_text(hjust = 0.5, color="black", size=14, face="bold")) + theme(legend.position="bottom")

#############################################################################
# Table A13.18 (Appendix) Famine mortality and post-famine ethnic composition
############################################################################
data <- main_data
y <- c("P2_BASES_K050", "AGAINSTALL", "PRT_TOP2", "PRORUSSIAN", "MAIDAN_ANTIGOV")
# Ethnic composition variables 
data$russians1939 <- with(data@data, 100*C1939_RUSSIAN/C1939_TOTAL)
data$ukrainians1939 <- with(data@data, 100*C1939_UKRAINIAN/C1939_TOTAL)
out.ethnicity_1 <- Iv.reg(c("ukrainians1939", "russians1939","C2001_UKRAINIAN", "C2001_RUSSIAN", "C2001E_UKRAINIAN", "C2001E_RUSSIAN"), Model = Model[[3]], data = data)[[1]]
temp(out.ethnicity_1)

############################################################################
# Table A13.19 (Appendix): IV estimates after adjusting for ethnic distribution 
############################################################################
out.ethnicity_adjust1 <- Iv.reg(y[3:5], Model = update(Model[[3]], . ~ . + C2001_UKRAINIAN), data = data)[[1]]
out.ethnicity_adjust2 <- Iv.reg(y[3:5], Model = update(Model[[3]], . ~ . + C2001E_UKRAINIAN), data = data)[[1]]
rbind(temp(out.ethnicity_adjust1), temp(out.ethnicity_adjust2))

############################################################################
# Figure A13.13 (Appendix): TSLS coefficients compared to average causal direct effects 
############################################################################

# Use DirectEffect for an alternative treatment of this:
# Extract first stage predicted values from out.iv object & use them manually for the second stage here:

ACDE <- function(t, Z){
  data$famine_hat <- out.iv[[3]][[t]]$iv$stage1$fitted.values
  data$Z <- data@data[, Z]
  data$y <- data@data[, y[t]]
  tsls   <- lm(formula(update(First, scale(y) ~ . + factor(NAME_1) + famine_hat), rhs = 1), data = data, weights = data$w)
  fit_first <- lm(formula(update(First, scale(y) ~ . + factor(NAME_1) + famine_hat + Z), rhs = 1), data = data, weights = data$w)
  form_main <- update(Formula(update(formula(fit_first), . ~ . - Z)), . ~ . | Z)
  direct <- sequential_g(formula = form_main,
                         first_mod = fit_first,
                         data = data, weights = data$w)
  data.frame(var = y[t], Z = Z, type = c("TSLS", "ACDE"), do.call("rbind", lapply(list(tsls, direct), function(x) c(coefficients(x)["famine_hat"], confint(x, "famine_hat")))))
}

out <- do.call("rbind", lapply(3:5, function(t) do.call("rbind", lapply(c("C2001_UKRAINIAN", "C2001E_UKRAINIAN"), function(Z) ACDE(t, Z)))))
out$var <- factor(out$var, labels = strsplitm(labels[3:5], "\\(", 1))
out$Z   <- factor(out$Z, labels = c("Mediator: \n Ukrainian language in 2001", "Mediator: \n  Ukrainian ethnicity in 2001"))
ggplot(out, aes(type, famine_hat)) + 
  geom_hline(yintercept=0, lty=3, lwd=.5, colour="grey50") +
  geom_errorbar(aes(ymin=V2, ymax=V3), 
                lwd=1, colour="blue", width=0) +
  geom_point(size=4, pch=21, fill="black") +
  theme_bw() + coord_flip() + facet_grid(Z ~ var) + xlab("") + ylab("")


#############################################################
# REPLICATION OF RESULTS THAT USE SURVEY DATA
#############################################################
# Prepare survey data
survey <- get(load("survey_data.Rdata"))
# Merge with rayon data
data <- main_data
survey <- merge(survey, data, by=c("NAME_2_1933", "NAME_1_1933"), all.x=T, all.y=F)
# Index of opposition to Russia
X.opp <- survey[, c("opinion_support_dnrlnr", "opinion_support_ukrarmy", "opinion_support_rsector", "opinion_support_rusarmy")]
X.opp$opinion_support_dnrlnr <- 6 - X.opp$opinion_support_dnrlnr
X.opp$opinion_support_rusarmy <- 6 - X.opp$opinion_support_rusarmy 
survey$y <- survey$antirus_index <- apply(X.opp, 1, mean, na.rm = TRUE)
# Scaled DNR/LNR control
survey$DNR <- survey$CONTROL_DAYS/max(survey$CONTROL_DAYS)

############################################################################
# Table A2.2 (Appendix): Summary statistics
############################################################################
varz <- c("ex_holod_die_any_nn","antirus_index","opinion_support_ukrarmy","opinion_support_ukrarmy_d","opinion_support_rsector","opinion_support_rsector_d","opinion_support_rusarmy","opinion_support_rusarmy_d","opinion_support_dnrlnr","opinion_support_dnrlnr_d","opinion_support_proukr_d","opinion_support_prorus_d","history_holod_genoc","history_holod_genoc_d","history_holod_nature","history_holod_nature_d","demo_age","demo_job","demo_income_nn","demo_educ_high","demo_male","demo_married","demo_language_ukrainian","demo_household_adults")
labz <- c("Descendant of Holodomor victim","Opposition to pro-Russian groups","Support Ukrainian Army (Likert)","Support Ukrainian Army (dummy)","Support Right Sector (Likert)","Support Right Sector (dummy)","Support Russian Army (Likert)","Support Russian Army (dummy)","Support DNR/LNR (Likert)","Support DNR/LNR (dummy)","Support pro-Ukrainian (dummy)","Support pro-Russian (dummy)","Holodomor: due to state policy (Likert)","Holodomor: due to state policy (dummy)","Holodomor: due to natural causes (Likert)","Holodomor: due to natural causes (dummy)","Age","Profession (categories)","Income (categories)","Higher education","Male","Married","Ukrainian-speaking","Number of other adults in household")

# Output
sumtab <- sumstat(X=survey,varz = varz,labz=labz,meancount = T)
xtable(sumtab[,1:4],caption = "Summary statistics: survey data",label = "tab:survey_sumstat")

#############################################################
# Table 2: WEATHER AND ATTRIBUTION OF FAMINE’S CAUSES IN SURVEY
#############################################################

# Regressions (SE's clustered by raion)
survey_Model <- Formula(history_holod_genoc ~ W_BMA_averaged | 0 | 0 | NAME_2_1933)
out <- list(
  felm(survey_Model, data = survey),
  felm(update(survey_Model, history_holod_nature ~ .), data = survey)
)

matrix(do.call("cbind", reg_latex(out, var = "W_BMA_averaged"))[,-c(1, 3)], nrow = 2, dimnames = list(c("Weather adversity in 1931-32", "Observations"), c("\\multicolumn{1}{c}{Holodomor was genocide}", "\\multicolumn{1}{c}{Holodomor had natural causes}")))

###################################################################
# Table 4: FAMILY-LEVEL EFFECTS OF FAMINE LOSSES BY COERCION THREAT
###################################################################
out <- list(
  felm(y ~ ex_holod_die_anyd | 0 | 0 | NAME_2_1933, data = survey),
  felm(y ~ ex_holod_die_anyd*DNR | 0 | 0 | NAME_2_1933, data = survey),
  felm(y ~ ex_holod_die_anyd*DNR | 0 | 0 | NAME_2_1933, data = survey, subset = NAME_1!="Kharkiv")
)

tab <- Reduce(function(...) merge(..., by = "var", all=T), reg_latex(out))[-1,]
tab <- tab[match(c("ex_holod_die_anyd", "ex_holod_die_anyd:DNR", "DNR", "Observations"), tab$var), -1]
colnames(tab) <- c("\\multicolumn{1}{c}{Full sample}", "\\multicolumn{1}{c}{Full sample}", "\\multicolumn{1}{c}{Conflict region}")
rownames(tab) <- c("Family losses", "Family losses $\\times$  DNR/LNR", "DNR/LNR", "Observations")
tab

###################################################################
# Table A9.12: Family-level effects of famine losses by coercion threat
###################################################################

survey_Model <- Formula(y ~ ex_holod_die_anyd*DNR | 0 | 0 | NAME_2_1933)
out <- list(
  felm(update(survey_Model, opinion_support_dnrlnr  ~ .), data = survey, subset = NAME_1!="Kharkiv"),
  felm(update(survey_Model, opinion_support_rusarmy  ~ .), data = survey, subset = NAME_1!="Kharkiv"),
  felm(update(survey_Model, opinion_support_rsector  ~ .), data = survey, subset = NAME_1!="Kharkiv"),
  felm(update(survey_Model, opinion_support_ukrarmy  ~ .), data = survey, subset = NAME_1!="Kharkiv"),
  felm(y ~ ukrainian + russian + rural + FOREST + I(1 - INDUSTRY_NONE) + AGRO_SHORT + log(C1926_TOTAL) + ns(DIST2RU, 3) + ex_holod_die_anyd*DNR | NAME_1 | 0 | NAME_2_1933, data = survey, subset = NAME_1!="Kharkiv")
)

tab <- Reduce(function(...) merge(..., by = "var", all=T), reg_latex(out))[-1,]
tab <- tab[match(c("ex_holod_die_anyd", "ex_holod_die_anyd:DNR", "DNR", "Observations"), tab$var), -1]
colnames(tab) <- c("\\multicolumn{1}{c}{DNR/LNR}", "\\multicolumn{1}{c}{Russian army}", "\\multicolumn{1}{c}{Right sector}", "\\multicolumn{1}{c}{Ukrainian army}", "\\multicolumn{1}{c}{Index}")
rownames(tab) <- c("Family losses", "Family losses $\\times$ DNR/LNR", "DNR/LNR", "Observations")
tab

###################################################################
# Figure A9.4: Differences in the effects of famine losses by DNR/LNR controlled areas using cut-offs
###################################################################

f <- function(x){
  survey$DNR <- I(survey$CONTROL_DAYS > x)
  Model <- y ~ DNR + ex_holod_die_anyd:I(DNR) | 0 | 0 | NAME_2_1933
  out <- felm(Model, data = survey, subset = NAME_1!="Kharkiv")
  w <- grep("ex_holod_die_anyd", names(coefficients(out)))
  data.frame(x, b = coefficients(out)[w], confint(out)[w, ])
}

out <- data.frame(do.call("rbind", lapply(sort(unique(survey$CONTROL_DAYS))[1:10], f)))
out$DNR <- grepl("TRUE", rownames(out))
legend.title <- "More than $x$ days \n under DNR/LNR control"
ggplot(out,  aes(colour = DNR, shape = DNR)) + geom_pointrange(aes(x = x, y = b, ymin=X2.5.., ymax=X97.5..), lwd = 1, position = position_dodge(width = 1/2)) + theme_bw() + xlab("Cut-off ($x$)") + ylab("Marginal effect of family losses") + scale_x_continuous(breaks = unique(out$x)) + theme(panel.grid.minor = element_blank()) + labs(shape=legend.title, colour=legend.title) +  scale_color_manual(values = c("black", "grey"))

###################################################################
# Figure A9.5: The marginal effect of family losses in famine
###################################################################
dsurvey <- subset(survey, NAME_1 !="Kharkiv")
out.gam <- gam(y ~ ex_holod_die_anyd + s(CONTROL_DAYS, by = factor(ex_holod_die_anyd), k = 3), data = dsurvey)
z_days <- sort(unique(dsurvey$CONTROL_DAYS))
X.pred <- function(x) predict(out.gam, newdata = data.frame(ex_holod_die_anyd = x, CONTROL_DAYS = z_days), type = "lpmatrix")
b <- rmnorm(1000, coefficients(out.gam), varcov = cluster.vcov(out.gam, dsurvey$NAME_2_1933))
y.hat <- X.pred(1)%*%t(b) - X.pred(0)%*%t(b)

plot(z_days, apply(y.hat, 1, mean), type = "o", pch = 16, ylim = c(-.25, .65), xlab = "Days under DNR/LNR control", ylab = "Marginal effect of having family losses")
abline(h = 0, lty = 3)
segments(z_days, apply(y.hat, 1, quantile, 0.025), z_days, apply(y.hat, 1, quantile, 0.975), lwd = 1.5)

###################################################################
# Figure A9.6: Sensitivity of estimates to attrition
###################################################################

p <- function(b){
  e <- seq(0, .8, length = 100)
  y <- cbind(b[1], b[2], b[2]/(1-e))
  matplot(e, y, type = "l", xlab = "{\\bf Attrition rate}", ylab = "{\\bf Marginal effect}", lwd = 3, col = "black", xaxt = "n", xaxs = "i")
  axis(1, seq(-1, 2, by = 0.1))
  text(e[30], b[1], "Effect of famine outside DNR/LNR", pos = 3)
  text(e[60], b[2], "Effect of famine inside DNR/LNR (without adjusting for attrition)", pos = 3)
  text(e[80], b[2]/(1-e[80]), "Effect of famine inside DNR/LNR \n adjusted for attrition", pos = 2)
}

p(b = f(1)[,2])

###################################################################
# Table A14.20: Opposition to Russian separatism as a function....
###################################################################
out <- list(
  felm(y ~ famine | 0 | 0 | NAME_2_1933, data = survey),
  felm(y ~ ukrainian + russian + rural + FOREST + I(1 - INDUSTRY_NONE) + AGRO_SHORT + log(C1926_TOTAL) + ns(DIST2RU, 3) + famine | NAME_1 | 0 | NAME_2_1933, data = survey),
  felm(y ~ famine + ex_holod_die_anyd | 0 | 0 | NAME_2_1933, data = survey),
  felm(y ~ ukrainian + russian + rural + FOREST + I(1 - INDUSTRY_NONE) + AGRO_SHORT + log(C1926_TOTAL) + ns(DIST2RU, 3) + ex_holod_die_anyd + famine | NAME_1 | 0 | NAME_2_1933, data = survey),
  felm(y ~ ukrainian + russian + rural + FOREST + I(1 - INDUSTRY_NONE) + AGRO_SHORT + log(C1926_TOTAL) + ns(DIST2RU, 3) + ex_holod_die_anyd*famine | NAME_1 | 0 | NAME_2_1933, data = survey)
)

tab <- Reduce(function(...) merge(..., by = "var", all=T), reg_latex(out))[-1,]
tab <- tab[match(c("famine", "ex_holod_die_anyd", "ex_holod_die_anyd:famine", "Observations"), tab$var), -1]
colnames(tab) <- c("\\multicolumn{1}{c}{(1)}", "\\multicolumn{1}{c}{(2)}", "\\multicolumn{1}{c}{(3)}", "\\multicolumn{1}{c}{(4)}", "\\multicolumn{1}{c}{(5)}")
rownames(tab) <- c("Famine (rayon-level)", "Family losses", "Interaction", "Observations")
tab

############################################################################
# Table A9.13 (Appendix): Exact matching estimates
############################################################################

## Load data
survey <- get(load("survey_data_d.Rdata"))

# Merge with instrumented famine exposure
data <- main_data
data$weather_bma_high1 <- 1*(data$W_BMA_averaged>median(data$W_BMA_averaged,na.rm=T))
data$MATCH <- paste0(data$NAME_1,"_",data$NAME_2_1933)
survey$MATCH <- paste0(survey$NAME_1,"_",survey$NAME_2_1933)
data0 <- data@data[,c("MATCH","W_BMA_averaged","weather_bma_high1")]
survey <- merge(survey,data0,by="MATCH",all.x=T,all.y=F)
survey$MATCH <- NULL
survey$weather_bma_high2 <- 1*(survey$W_BMA_averaged>median(survey$W_BMA_averaged,na.rm=T))

## Pre-famine variables
survey$C1926_UKRAINIAN_PROP <- survey$C1926_UKRAINIAN/survey$C1926_TOTAL
survey$C1926_RUR_PROP <- survey$C1926_TOTAL_RUR/survey$C1926_TOTAL
survey$INDUSTRY_PR <- 1- survey$INDUSTRY_NONE
survey$LOG_C1926_TOTAL <- log(survey$C1926_TOTAL+1) 

## Create additive political index
X <- survey[,c("opinion_support_ukrarmy","opinion_support_rusarmy","opinion_support_rsector","opinion_support_dnrlnr")]
X$opinion_support_rusarmy <- 6-X$opinion_support_rusarmy
X$opinion_support_dnrlnr <- 6-X$opinion_support_dnrlnr
survey$antirus_index <- apply(X,1,function(x){mean(x,na.rm=TRUE)})

# Region dummies
dumX <- as.data.frame(model.matrix( ~ NAME_1_1933 - 1, data=survey))
names(dumX) <- gsub("Дніпропетровська","__Dnipro",names(dumX))
names(dumX) <- gsub("Донецька","__Donetsk",names(dumX))
names(dumX) <- gsub("Харківська","__Kharkiv",names(dumX))
{
  varz. <- names(dumX)
  dumX$TEMP0 <- row.names(dumX)
  survey$TEMP0 <- row.names(survey)
  survey <- merge(survey,dumX,by="TEMP0",all.x=T,all.y=F); rm(dumX)
  for(j in seq_along(varz.)){survey[is.na(survey[,varz.[j]]),varz.[j]]<-0}
}

# Specify model
dvz <- c("antirus_index","history_holod_genoc_d","history_holod_nature_d")
xvar_post <- c("NAME_1","demo_age_coarse","demo_job","demo_educ_high","demo_male","demo_married","demo_language_ukrainian","demo_household_adults")
xvar_pret <- c("NAME_1_1933","C1926_UKRAINIAN_PROP","C1926_RUR_PROP","FOREST","AGRO_SHORT","INDUSTRY_PR","LOG_C1926_TOTAL","DIST2RU","demo_male")
apply(survey[,xvar_pret],2,function(x){length(unique(x))})
xvarz <- list(
  xvar_post = xvar_post
  ,
  xvar_pret = xvar_pret
  ,
  xvar_both = unique(c(xvar_pret,xvar_post[-1]))
)
tvarz <- c("ex_holod_die_anyd")
tlabz <- c("Family exposure to famine")
condz.labz <- c("nonDNR","DNR","lowCom","highCom","Overall")
condz.list <- list(
  survey$STRAT_yesCont==0,
  survey$STRAT_yesCont==1,
  survey$FAMINE_PCT_HIGH==0,
  survey$FAMINE_PCT_HIGH==1,
  1:nrow(survey)
)


################
# Matching loop
################

# Genetic matching parameters
pop_ <- 10 #10
mg_ <- 2  #2
wg_ <- 1

# Universe of matching solutions
{
  pscore_=c(TRUE,FALSE);exact_=c(TRUE,FALSE); genetic_=c(TRUE,FALSE);BiasAdjust_=c(TRUE,FALSE); M_=c(1:3); CommonSupport_=c(FALSE); caliper_=seq(0,.25,by=.1); estimand_=c("ATT"); Weight_=c(1:2); sample_=c(TRUE,FALSE)
  match.set <- data.frame(pscore=pscore_)
  match.set2 <- data.frame(exact=exact_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(genetic=genetic_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(BiasAdjust=BiasAdjust_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(M=M_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(CommonSupport=CommonSupport_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(caliper=caliper_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(estimand=estimand_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(Weight=Weight_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  match.set2 <- data.frame(sample=sample_)
  names_ <- c(names(match.set),names(match.set2))
  match.set <- as.data.frame(cbind(match.set[rep(1:nrow(match.set),nrow(match.set2)),],match.set2[rep(1:nrow(match.set2),each=nrow(match.set)),]))
  names(match.set) <- names_
  
  # Remove redundancies
  match.set <- match.set[!(match.set$pscore&match.set$exact),]
  match.set <- match.set[!(match.set$exact&match.set$genetic),]
  match.set <- match.set[!(match.set$pscore&match.set$genetic),]
  match.set <- match.set[!(match.set$exact&match.set$caliper>0),]
  match.set <- match.set[!((!match.set$exact)&match.set$caliper==0),]
  rownames(match.set) <- 1:nrow(match.set)
}

# Loop
coarsen.qt <- TRUE
att.list1 <- lapply(seq_along(dvz),function(i){
  att.list2 <- lapply(seq_along(tvarz),function(j){
    att.list3 <- lapply(seq_along(condz.list),function(c0){
      att.list4 <- lapply(seq_along(xvarz),function(x0){print(paste("DV ",i,"/",length(dvz),"; Condition ",c0,"/",length(condz.list),"; Covariate set ",x0,"/",length(xvarz)))
        
        # Y & T
        dv <- dvz[i]
        tvar <- tvarz[j]
        xvar <- xvarz[[x0]]
        
        # create data matrix
        Y <- survey[,dv]
        if(length(tvar)==1){treat <- survey[,tvar]}
        if(length(tvar)>1){treat <- apply(survey[,tvar],1,sum)}
        X <- survey[,xvar]
        condz <- condz.list[[c0]]
        Y <- Y[condz]; treat <- treat[condz]; X <- X[condz,]
        no.na <- apply(cbind(Y,treat,X),1,function(x){sum(is.na(x))==0})
        Y <- Y[no.na]; treat <- treat[no.na]; X <- X[no.na,]
        xxvar <- xvar[grep("^NAME_|AGRO_SHORT",xvar)]
        for(xx0 in seq_along(xxvar)){
          X[,xxvar[xx0]] <- as.numeric(as.factor(X[,xxvar[xx0]]))
        }
        # if(coarsen.qt){X <- as.data.frame(apply(X,2,function(x){findInterval(x,quantile(x))}))}
        YTX <- cbind(data.frame(Y=Y,treat=treat),X)
        Y_0 <- Y; X_0 <- X; treat_0 <- treat; YTX_0 <- YTX
        # Parallelization setup
        cl  <- makeCluster(detectCores() - 1)
        clusterExport(cl, c("Match","GenMatch","MatchBalance","i","j","c0","x0","dvz","tvarz","condz.list","xvarz","match.set","tlabz","condz.labz","pop_","mg_","wg_"), envir=environment())
        # Match loop
        match_list <- parLapply(cl,1:nrow(match.set),function(mm){print(paste(i,"/",length(dvz),"; ",j,"/",length(tvarz),"; ",c0,"/",length(condz.list),"; ",x0,"/",length(xvarz),"...",mm,"/",nrow(match.set)))
          m_ <- match.set[mm,]
          # p-score model
          if(m_$pscore){
            # run
            Y <- Y_0; X <- X_0; treat <- treat_0; YTX <- YTX_0
            ps_ <- glm(paste0("treat ~ ",paste0(names(X),collapse="+")),data=YTX,family="binomial")$fitted
            mout <- Match(Y=Y, Tr=treat, X=ps_, exact=m_$exact,BiasAdjust = m_$BiasAdjust,M=m_$M,CommonSupport = m_$CommonSupport,caliper=m_$caliper,estimand = m_$estimand,Weight=m_$Weight,sample=m_$sample);
            mb <-
              tryCatch({
                MatchBalance(as.formula(paste0("treat~",paste0(names(X),collapse="+"))),data=YTX,match.out = mout,print.level = 0)
              }, error=function(e){NA})
            mb
            # summary(mout)
            mout0 <- mout; mb0 <- mb
          }
          if(m_$genetic){
            # run
            Y <- Y_0; X <- X_0; treat <- treat_0; YTX <- YTX_0
            genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=X, estimand=m_$estimand, M=m_$M, pop.size=pop_, max.generations=mg_, wait.generations=wg_,print.level = 0,hard.generation.limit=TRUE)
            mout <- Match(Y=Y, Tr=treat, X=X, exact=m_$exact,BiasAdjust = m_$BiasAdjust,M=m_$M,CommonSupport = m_$CommonSupport,caliper=ifelse(m_$caliper==0,NULL,m_$caliper),estimand = m_$estimand,Weight=m_$Weight,sample=FALSE,Weight.matrix=genout);
            mb <-
              tryCatch({
                MatchBalance(as.formula(paste0("treat~",paste0(names(X),collapse="+"))),data=YTX,match.out = mout,print.level = 0)
              }, error=function(e){NA})
            mb
            # summary(mout)
            mout0 <- mout; mb0 <- mb
          }
          if((!m_$pscore)&(!m_$genetic)){
            # run
            Y <- Y_0; X <- X_0; treat <- treat_0; YTX <- YTX_0
            mout <- Match(Y=Y, Tr=treat, X=X, exact=m_$exact,BiasAdjust = m_$BiasAdjust,M=m_$M,CommonSupport = m_$CommonSupport,caliper=m_$caliper,estimand = m_$estimand,Weight=m_$Weight,sample=m_$sample);
            mb <-
              tryCatch({
                MatchBalance(as.formula(paste0("treat~",paste0(names(X),collapse="+"))),data=YTX,match.out = mout,print.level = 0)
              }, error=function(e){NA})
            mb
            mout0 <- mout; mb0 <- mb
          }
          if((m_$exact)){
            # run
            Y <- Y_0; X <- X_0; treat <- treat_0; YTX <- YTX_0
            mout <- Match(Y=Y, Tr=treat, X=X, exact=m_$exact,BiasAdjust = m_$BiasAdjust,M=m_$M,CommonSupport = m_$CommonSupport,caliper=NULL,estimand = m_$estimand,Weight=m_$Weight,sample=TRUE);
            mb <-
              tryCatch({
                MatchBalance(as.formula(paste0("treat~",paste0(names(X),collapse="+"))),data=YTX,match.out = mout,print.level = 0)
              }, error=function(e){NA})
            mout0 <- mout; mb0 <- mb
          }
          
          # Assemble output matrix
          mout <- mout0
          mb <- mb0
          
          length(mb)
          m.df0 <- data.frame(Y=dv,treat=tlabz[j],X=names(xvarz)[x0],Subgroup=condz.labz[c0],ATT= NA, SE= NA, pval=  NA, N_before=NA,T_before=NA,N_after=NA, Bal_before=NA,Bal_after=NA)
          if(length(mout)>2&sum(is.na(mb))==0){
            temp <- capture.output(summary(mout))
            m.df0 <- data.frame(Y=dv,treat=tlabz[j],X=names(xvarz)[x0],Subgroup=condz.labz[c0],ATT= mout$est, SE= mout$se, pval=  sapply(strsplit(gsub("< ","",temp[grep("^p\\.val",temp)])," "),"[",3), N_before=mout$orig.nobs,T_before=mout$orig.treated.nobs,N_after=as.character(mout$wnobs), Bal_before=mean(abs(sapply(mb$BeforeMatching,function(x){ifelse(is.finite(x$sdiff),x$sdiff/100,NA)})),na.rm=T),Bal_after=mean(abs(sapply(mb$AfterMatching,function(x){ifelse(is.finite(x$sdiff),x$sdiff/100,NA)})),na.rm=T))
          }
          if(length(mout)>2&sum(is.na(mb))>0){
            temp <- capture.output(summary(mout))
            m.df0 <- data.frame(Y=dv,treat=tlabz[j],X=names(xvarz)[x0],Subgroup=condz.labz[c0],ATT= mout$est, SE= mout$se, pval=  sapply(strsplit(gsub("< ","",temp[grep("^p\\.val",temp)])," "),"[",3), N_before=mout$orig.nobs,T_before=mout$orig.treated.nobs,N_after=as.character(mout$wnobs), Bal_before=NA,Bal_after=NA,na.rm=T)
          }
          m.df0 <- as.data.frame(cbind(m.df0,m_))
          # Output
          match.out <- rbind(m.df0)
          match.out$na.rm <- NULL
          match.out
        }); stopCluster(cl)
        match_list <- match_list[!sapply(match_list,ncol)<max(sapply(match_list,ncol))]
        match_mat <- do.call(rbind,match_list)
        match_mat
      })
      att.list4 <- att.list4[!sapply(att.list4,ncol)<max(sapply(att.list4,ncol))]
      att.mat4 <- do.call(rbind,att.list4)
      att.mat4
    })
    att.list3 <- att.list3[!sapply(att.list3,ncol)<max(sapply(att.list3,ncol))]
    att.mat3 <- do.call(rbind,att.list3)
    att.mat3
  })
  
  att.list2 <- att.list2[!sapply(att.list2,ncol)<max(sapply(att.list2,ncol))]
  att.mat2 <- do.call(rbind,att.list2)
  att.mat2
})
att.mat_all <- do.call(rbind,att.list1)
att.mat_all <- att.mat_all[!is.na(att.mat_all$Y),]

# Subset by DV
att.mat_all$pval <- as.numeric(as.character(att.mat_all$pval))
att.mat_all <- att.mat_all[!is.na(att.mat_all$pval),]
att.mat_all <- att.mat_all[att.mat_all$Y%in%c("antirus_index"),]
att.mat_all <- att.mat_all[att.mat_all$Subgroup%in%c("nonDNR","DNR","Overall"),]

# Subset by method
att.exact <- att.mat_all[att.mat_all$exact,]
att.pscore <- att.mat_all[att.mat_all$pscore,]
att.mahalanobis <- att.mat_all[(!att.mat_all$pscore)&(!att.mat_all$genetic)&(!att.mat_all$exact),]
att.genetic <- att.mat_all[att.mat_all$genetic,]

## Table: exact matching
att.tab <- att.wm(att.mat=att.exact[att.exact$X%in%c("xvar_pret"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_pret <- att.tab
att.tab <- att.wm(att.mat=att.exact[att.exact$X%in%c("xvar_both"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_post <- att.tab
att.tab <- rbind(att.tab_pret,att.tab_post)
att.mat.print <- data.frame(Subgroup=att.tab$Subgroup,ATT=att.tab$ATT_print,SE=att.tab$SE_print,Obs=att.tab$N_before,Matched=ceiling(att.tab$N_after),Imbalance_Pre=round(att.tab$Bal_before,2),Imbalance_Post=round(att.tab$Bal_after,2))

xtable(att.mat.print,caption = "\\textsc{Matched analysis: effect of famine exposure on opposition to pro-Russian groups}. Exact matching estimator. ATT: average treatment effect on the treated. SE: standard error. Imbalance: mean standardized difference between covariates in the treated and control units, before and after matching.",  label = "tab:survey_matched_1",digits=c(0,0,3,0,0,0,2,2))

############################################################################
# Table A9.14 (Appendix): Other matching estimates
############################################################################

att.tab <- att.wm(att.mat=att.pscore[att.pscore$X%in%c("xvar_pret"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_pscore <- att.tab

att.tab <- att.wm(att.mat=att.mahalanobis[att.mahalanobis$X%in%c("xvar_pret"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_mahalanobis <- att.tab

att.tab <- att.wm(att.mat=att.genetic[att.genetic$X%in%c("xvar_pret"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_genetic <- att.tab

att.tab <- att.wm(att.mat=att.mat_all[att.mat_all$X%in%c("xvar_both"),])
att.tab$Subgroup <- gsub("DNR","DNR/LNR",att.tab$Subgroup)
att.tab$Subgroup <- gsub("nonDNR","No DNR",att.tab$Subgroup)
att.tab$zstat <- att.tab$ATT/att.tab$SE
att.tab$pval <- exp(-0.717*att.tab$zstat - 0.416*att.tab$zstat^2)
att.tab$ATT_print <- paste0(round(att.tab$ATT,3))
att.tab$SE_print <- paste0("(",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab$ATTSE_print <- paste0(round(att.tab$ATT,3)," (",round(att.tab$SE,3),")",c("***","**","*","'","")[findInterval(att.tab$pval,c(0,.001,.01,.05,.1))])
att.tab_all <- att.tab

att.tab <- rbind(att.tab_pscore,att.tab_mahalanobis,att.tab_genetic)

att.mat.print <- data.frame(Subgroup=att.tab$Subgroup,ATT=att.tab$ATT_print,SE=att.tab$SE_print,Obs=att.tab$N_before,Matched=ceiling(att.tab$N_after),Imbalance_Pre=round(att.tab$Bal_before,2),Imbalance_Post=round(att.tab$Bal_after,2))

xtable(att.mat.print,caption = "\\textsc{Alternative matching estimators: effect of famine exposure on opposition to pro-Russian groups}. ATT: average treatment effect on the treated. SE: standard error. Imbalance: mean standardized difference between covariates in the treated and control units, before and after matching.", label = "tab:survey_matched_2",digits=c(0,0,3,0,0,0,2,2))